MAP TREE ATTRIBUTES USING RAPIDEYE AND LANDSAT 8 IMAGES AND kNN METHOD

## [1] "C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data"

RAPIDEYE IMAGE

’

# "re_refl.tiff" is a spatial subset of Rapid Eye satellite image

# load multiband satellite image stack
rapideye<- stack("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data/re_refl.tif")
rapideye#see properties of RasterStack object
## class       : RasterStack 
## dimensions  : 3510, 3720, 13057200, 5  (nrow, ncol, ncell, nlayers)
## resolution  : 5, 5  (x, y)
## extent      : 605385, 623985, 1289655, 1307205  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       : re_refl.1, re_refl.2, re_refl.3, re_refl.4, re_refl.5 
## min values  :        11,       362,       411,       827,       922 
## max values  :      6928,      7321,      7326,      7059,      7456
# aggregare ERapid EYe
rapideye.30m<-aggregate(rapideye, fact=6, fun=mean)
rapideye.30m
## class       : RasterBrick 
## dimensions  : 585, 620, 362700, 5  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 605385, 623985, 1289655, 1307205  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## data source : C:\Users\oyelo\AppData\Local\Temp\RtmpGQreMU\raster\r_tmp_2017-04-16_034232_11664_67147.grd 
## names       : re_refl.1, re_refl.2, re_refl.3, re_refl.4, re_refl.5 
## min values  :  182.2500,  522.5000,  552.5278, 1039.5556, 1092.5278 
## max values  :  2946.083,  3598.556,  4071.917,  4346.778,  5084.139
#remove unnecessary files
rm(rapideye)

# the image has five spectral bands corresponding to blue, green, red, red edge
# near infrared (NIR)  parts of the electromagnetic
# spectrum

# rename bands
bands_re <- c("b.re","g.re","r.re","rededge","nir.re")
names(rapideye.30m) <- bands_re


par(mfrow=c(1,2))
# visualize the image
plotRGB(rapideye.30m, r="r.re", g="g.re", b="b.re", stretch="lin", main="RE truecolour")

# this is so called "true-color" composite as it best corresponds to colors seen by eye

# several "false-color" composites exist for visualization
# here, green vegetation is seen in red (strong reflectance in near infrared)
plotRGB(rapideye.30m, r="nir.re", g="r.re", b="g.re", stretch="lin", main="RE false colour")

LANDSAT IMAGE

# "Landsat.tiff" is a spatial subset of Landsat 8 OLI satellite image
# acquired 11 October 2013

# load multiband satellite image stack
landsat<- stack("C:/Users/oyelo/Desktop/advanced remote sensing/exercise 3/BIODEV training manual, data and scripts-20170403/R/R/Data/ls8_sr_2014_02_16.tif")
landsat#see properties of RasterStack object
## class       : RasterStack 
## dimensions  : 585, 620, 362700, 6  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 605385, 623985, 1289655, 1307205  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       : ls8_sr_2014_02_16.1, ls8_sr_2014_02_16.2, ls8_sr_2014_02_16.3, ls8_sr_2014_02_16.4, ls8_sr_2014_02_16.5, ls8_sr_2014_02_16.6 
## min values  :                 484,                 726,                 876,                1614,                2005,                1420 
## max values  :                1578,                2298,                3041,                3977,                7361,               13529
# the image has six spectral bands corresponding to blue, green, red, 
# near infrared (NIR) and short waveinfrared (SWIR) parts of the electromagnetic
# spectrum

# rename bands
bands_l8 <- c("b","g","r","nir","swir1","swir2")
names(landsat) <- bands_l8


par(mfrow=c(1,2))
# visualize the image
plotRGB(landsat, r="r", g="g", b="b", stretch="lin", main="LS8 true colour")
# the image covers 12 km x 12 km area around sentinel site mid-point
# this is so called "true-color" composite as it best corresponds to colors seen by eye

# several "false-color" composites exist for visualization
# here, green vegetation is seen in red (strong reflectance in near infrared)
plotRGB(landsat, r="nir", g="r", b="g", stretch="lin", main="LS8 false colour")

JOIN FIELD DATA WITH COORDINATES FOR RAPIDEYE

# merge Plot.data with plots (coordinates only)
data <- merge(Plot.data, plots[c("plot.id","lat","lon")], by="plot.id", all.x=TRUE)

# reproject coordinates from Lat/Lon to UTM
# add columns for "utm_x" and "utm_y"
data[c("utm_x","utm_y")] <- as.data.frame(project(cbind(data$lon, data$lat),
                                                  "+proj=utm +zone=30 ellps=WGS84"))

# plot image with plots
plotRGB(rapideye.30m, r=4, g=3, b=2, stretch="lin", main="RE with plots")
points(data$utm_x, data$utm_y, pch=19, col="yellow")

JOIN FIELD DATA WITH COORDINATES FOR LANDSAT8

# merge Plot.data with plots (coordinates only)
data <- merge(Plot.data, plots[c("plot.id","lat","lon")], by="plot.id", all.x=TRUE)

# reproject coordinates from Lat/Lon to UTM
# add columns for "utm_x" and "utm_y"
data[c("utm_x","utm_y")] <- as.data.frame(project(cbind(data$lon, data$lat),
                                                  "+proj=utm +zone=30 ellps=WGS84"))

# plot image with plots
plotRGB(landsat, r=4, g=3, b=2, stretch="lin", main="LS 8 with plots")
points(data$utm_x, data$utm_y, pch=19, col="yellow")

EXTRACT PIXEL-VALUES FROM RAPIDEYE IMAGE

# for further analysis, we need to extract pixel values for the plots
data[bands_re] <- as.data.frame(extract(rapideye.30m, cbind(data$utm_x, data$utm_y),
                                        method='bilinear'))

# see how data looks now
head(data)
##   plot.id cluster plot count.ha    ba.ha    agb.ha   bgb.ha   agc.ha
## 1     101       1    1      120 2.774560  8.565222 2.355436 4.025654
## 2     102       1    2       20 1.517970  7.105600 1.954040 3.339632
## 3     103       1    3      240 2.021866  5.727592 1.575088 2.691968
## 4     104       1    4       20 2.202256 11.602759 3.190759 5.453297
## 5     105       1    5       30 2.387948 10.386064 2.856168 4.881450
## 6     106       1    6       40 3.362431 15.486342 4.258744 7.278581
##      bgc.ha      v.ha  dbh.mean dbh.qdr.mean    h.mean  h.loreys
## 1 1.1070550  7.449620 16.683333     18.56205  6.231667  6.392793
## 2 0.9183987  6.245248 30.900000     31.63927  9.350000  9.795736
## 3 0.7402913  4.979081  8.583333     16.99223  3.882083  5.863372
## 4 1.4996566 10.312879 37.400000     37.57286 10.900000 11.149689
## 5 1.3423988  9.120726 31.500000     32.84413  8.800000  9.094006
## 6 2.0016097 13.900897 32.200000     34.35429  9.162500  9.843287
##   count.ha.sdt ba.ha.sdt agb.ha.sdt bgb.ha.sdt agc.ha.sdt bgc.ha.sdt
## 1            0         0          0          0          0          0
## 2            0         0          0          0          0          0
## 3            0         0          0          0          0          0
## 4            0         0          0          0          0          0
## 5            0         0          0          0          0          0
## 6            0         0          0          0          0          0
##   v.ha.sdt count.ha.ddw agb.ha.ddw v.ha.ddw agc.ha.ddw    c.can      lat
## 1        0            0  0.0000000 0.000000 0.00000000 18.50408 11.69887
## 2        0            0  0.0000000 0.000000 0.00000000 12.29554 11.70358
## 3        0          100  0.1403026 2.775008 0.06594222 20.16882 11.69863
## 4        0            0  0.0000000 0.000000 0.00000000 10.68749 11.70473
## 5        0            0  0.0000000 0.000000 0.00000000 13.00654 11.70902
## 6        0            0  0.0000000 0.000000 0.00000000 22.17338 11.70736
##         lon    utm_x   utm_y      b.re     g.re     r.re  rededge   nir.re
## 1 -1.982213 610922.2 1293456  819.4953 1366.672 2012.315 2481.336 3365.917
## 2 -1.982173 610924.7 1293977 1229.4334 1855.866 2637.134 3025.833 3762.842
## 3 -1.978797 611294.6 1293430  851.0370 1334.529 1921.262 2374.430 3161.094
## 4 -1.975357 611667.1 1294106 1104.8116 1758.811 2561.255 2980.392 3798.727
## 5 -1.977112 611474.1 1294580  839.8548 1343.992 1987.241 2478.641 3291.341
## 6 -1.976407 611551.6 1294397  870.4163 1422.145 2121.354 2550.309 3316.303

EXTRACT PIXEL-VALUES FROM LANDSAT8 IMAGE

# for further analysis, we need to extract pixel values for the plots
data[bands_l8] <- as.data.frame(extract(landsat, cbind(data$utm_x, data$utm_y),
                                        method='bilinear'))
# see how data looks now
head(data)
##   plot.id cluster plot count.ha    ba.ha    agb.ha   bgb.ha   agc.ha
## 1     101       1    1      120 2.774560  8.565222 2.355436 4.025654
## 2     102       1    2       20 1.517970  7.105600 1.954040 3.339632
## 3     103       1    3      240 2.021866  5.727592 1.575088 2.691968
## 4     104       1    4       20 2.202256 11.602759 3.190759 5.453297
## 5     105       1    5       30 2.387948 10.386064 2.856168 4.881450
## 6     106       1    6       40 3.362431 15.486342 4.258744 7.278581
##      bgc.ha      v.ha  dbh.mean dbh.qdr.mean    h.mean  h.loreys
## 1 1.1070550  7.449620 16.683333     18.56205  6.231667  6.392793
## 2 0.9183987  6.245248 30.900000     31.63927  9.350000  9.795736
## 3 0.7402913  4.979081  8.583333     16.99223  3.882083  5.863372
## 4 1.4996566 10.312879 37.400000     37.57286 10.900000 11.149689
## 5 1.3423988  9.120726 31.500000     32.84413  8.800000  9.094006
## 6 2.0016097 13.900897 32.200000     34.35429  9.162500  9.843287
##   count.ha.sdt ba.ha.sdt agb.ha.sdt bgb.ha.sdt agc.ha.sdt bgc.ha.sdt
## 1            0         0          0          0          0          0
## 2            0         0          0          0          0          0
## 3            0         0          0          0          0          0
## 4            0         0          0          0          0          0
## 5            0         0          0          0          0          0
## 6            0         0          0          0          0          0
##   v.ha.sdt count.ha.ddw agb.ha.ddw v.ha.ddw agc.ha.ddw    c.can      lat
## 1        0            0  0.0000000 0.000000 0.00000000 18.50408 11.69887
## 2        0            0  0.0000000 0.000000 0.00000000 12.29554 11.70358
## 3        0          100  0.1403026 2.775008 0.06594222 20.16882 11.69863
## 4        0            0  0.0000000 0.000000 0.00000000 10.68749 11.70473
## 5        0            0  0.0000000 0.000000 0.00000000 13.00654 11.70902
## 6        0            0  0.0000000 0.000000 0.00000000 22.17338 11.70736
##         lon    utm_x   utm_y      b.re     g.re     r.re  rededge   nir.re
## 1 -1.982213 610922.2 1293456  819.4953 1366.672 2012.315 2481.336 3365.917
## 2 -1.982173 610924.7 1293977 1229.4334 1855.866 2637.134 3025.833 3762.842
## 3 -1.978797 611294.6 1293430  851.0370 1334.529 1921.262 2374.430 3161.094
## 4 -1.975357 611667.1 1294106 1104.8116 1758.811 2561.255 2980.392 3798.727
## 5 -1.977112 611474.1 1294580  839.8548 1343.992 1987.241 2478.641 3291.341
## 6 -1.976407 611551.6 1294397  870.4163 1422.145 2121.354 2550.309 3316.303
##           b        g        r      nir    swir1    swir2
## 1  951.6492 1394.592 1817.576 3142.439 3840.167 2950.875
## 2 1204.6968 1756.546 2293.718 3413.576 4432.911 3835.910
## 3  973.9948 1424.100 1842.532 3055.005 3866.137 3044.507
## 4 1154.8354 1697.474 2218.241 3519.053 4218.354 3540.430
## 5  990.9266 1443.459 1904.928 3198.101 3982.312 3107.111
## 6 1083.4366 1569.858 2078.348 3241.671 4099.756 3256.872

ACCURACY ASSESSMENT BY CROSS-VALIDATION FOR RAPIDEYE

# test accuracy for several variables
# make vector of y-variables
yvars <- c("ba.ha","agb.ha","h.mean","c.can")
k <- 3
method <- "msn"
xvars <- bands_re
par(mfrow=c(2,2)) #four variables are plotted in four panels
res_re <- c()
for(i in 1:length(yvars)){
  res_re <- rbind(res_re, cv.plot(data, yvars[i], xvars, k, method)) 
}
## [1] "ba.ha"
## BIAS: -0.163 (-3.05%)
## RMSE: 2.79 (52.2%)
## Pseudo R.squared: 0.468
## [1] "agb.ha"
## BIAS: 0.103 (0.55%)
## RMSE: 11.7 (62.4%)
## Pseudo R.squared: 0.375
## [1] "h.mean"
## BIAS: 0.0787 (1.29%)
## RMSE: 2.4 (39.4%)
## Pseudo R.squared: 0.00517
## [1] "c.can"
## BIAS: 0.298 (1.1%)
## RMSE: 13.8 (51.2%)
## Pseudo R.squared: 0.458

ACCURACY ASSESSMENT BY CROSS-VALIDATION FOR LANDSAT8

# test accuracy for several variables
# make vector of y-variables
yvars <- c("ba.ha","agb.ha","h.mean","c.can")
k <- 3
method <- "msn"
xvars <- bands_l8
par(mfrow=c(2,2)) #four variables are plotted in four panels
res_l8 <- c()
for(i in 1:length(yvars)){
  res_l8 <- rbind(res_l8, cv.plot(data, yvars[i], xvars, k, method)) 
}
## [1] "ba.ha"
## BIAS: 0.0793 (1.48%)
## RMSE: 2.52 (47.1%)
## Pseudo R.squared: 0.564
## [1] "agb.ha"
## BIAS: 0.0404 (0.216%)
## RMSE: 11.1 (59.2%)
## Pseudo R.squared: 0.424
## [1] "h.mean"
## BIAS: -0.0183 (-0.3%)
## RMSE: 2.17 (35.6%)
## Pseudo R.squared: 0.0732
## [1] "c.can"
## BIAS: 0.286 (1.06%)
## RMSE: 13.4 (49.7%)
## Pseudo R.squared: 0.484

From the below, it can be seen that the landsat 8 is more accurate than the rapideye as it is the RMSE values are lower which shows that the residuals (i.e the difference between the measured/observed values and the predictions) are lower. Although, the differences are not so much, it can be deduced that the landsat 8 provide more accurate predictions for the Basal area, the above-ground biomass, mean height of trees and the canopy cover. Hence, it would be more appropriate.

# organized results for rapideye
res_re <- cbind(yvars, res_re)
res_re <- as.data.frame(res_re)
names(res_re) <- c("Attribute","Bias","Bias (%)","RMSE","RMSE (%)","R.squared")
res_re
##   Attribute   Bias Bias (%) RMSE RMSE (%) R.squared
## 1     ba.ha -0.163    -3.05 2.79     52.2     0.468
## 2    agb.ha  0.103     0.55 11.7     62.4     0.375
## 3    h.mean 0.0787     1.29  2.4     39.4   0.00517
## 4     c.can  0.298      1.1 13.8     51.2     0.458
# organized results for landsat8
res_l8 <- cbind(yvars, res_l8)
res_l8 <- as.data.frame(res_l8)
names(res_l8) <- c("Attribute","Bias","Bias (%)","RMSE","RMSE (%)","R.squared")
res_l8
##   Attribute    Bias Bias (%) RMSE RMSE (%) R.squared
## 1     ba.ha  0.0793     1.48 2.52     47.1     0.564
## 2    agb.ha  0.0404    0.216 11.1     59.2     0.424
## 3    h.mean -0.0183     -0.3 2.17     35.6    0.0732
## 4     c.can   0.286     1.06 13.4     49.7     0.484

PREDICT ATTRIBUTES TO PIXELS FOR RAPIDEYE

# create yai-object
# select vegetation attributes
yvars <- c("agb.ha","c.can")
xvars <- bands_re
y <- as.data.frame(data[,yvars])
x <- as.data.frame(data[,xvars])
k = 3
method = "msn"
msn <- yai(x=x, y=y, k=k, data=data, method=method)

# convert RapidEye image to data frame
rapideye.30m.df <- as.data.frame(rapideye.30m)

# add 160 observations to data frame
r160 <- as.data.frame(matrix(nrow=160, ncol=5, 0))
names(r160) <- bands_re
rapideye.30m.df <- rbind(r160, rapideye.30m.df)

# predict values for pixels
pred <- predict(msn, newdata=rapideye.30m.df, k=3, method="dstWeighted", observed=FALSE)
# ignore warning: this is why 160 rows were added above 


#RapidEye
# convert data frame back to raster attribute by attribute
# nrow and ncol depend on the image size
# set extent and projection based on the image
# aboveground biomass
agb_ha <- raster(matrix(pred$agb.ha, nrow=585, ncol=620, byrow=TRUE))
extent(agb_ha) <- rapideye.30m
projection(agb_ha) <- projection(rapideye.30m)

# canopy cover
cc <- raster(matrix(pred$c.can, nrow=585, ncol=620, byrow=TRUE))
extent(cc) <- rapideye.30m
projection(cc) <- projection(rapideye.30m)

# plot rasters
par(mfrow=c(1,2))
plot(agb_ha, main="Aboveground biomass (Mg/ha)")
plot(cc, main="Canopy cover (%)")

# compute mean based on the raster
round(cellStats(agb_ha, stat='mean'), 2)
## [1] 15.66
round(cellStats(cc, stat='mean'), 2)
## [1] 23.76
# should be close to the field based estimate

PREDICT ATTRIBUTES TO PIXELS FOR LANDSAT8

# create yai-object
# select vegetation attributes
yvars <- c("agb.ha","c.can")
xvars <- bands_l8
y <- as.data.frame(data[,yvars])
x <- as.data.frame(data[,xvars])
k = 3
method = "msn"
msn <- yai(x=x, y=y, k=k, data=data, method=method)

# convert Landsat image to data frame
landsat.df <- as.data.frame(landsat)

# add 160 observations to data frame
r160 <- as.data.frame(matrix(nrow=160, ncol=6, 0))
names(r160) <- bands_l8
landsat.df <- rbind(r160, landsat.df)

# predict values for pixels
pred <- predict(msn, newdata=landsat.df, k=3, method="dstWeighted", observed=FALSE)
# ignore warning: this is why 160 rows were added above 


#Lansaat
# convert data frame back to raster attribute by attribute
# nrow and ncol depend on the image size
# set extent and projection based on the image

# aboveground biomass
agb_ha <- raster(matrix(pred$agb.ha, nrow=585, ncol=620, byrow=TRUE))
extent(agb_ha) <- landsat
projection(agb_ha) <- projection(landsat)

# canopy cover
cc <- raster(matrix(pred$c.can, nrow=585, ncol=620, byrow=TRUE))
extent(cc) <- landsat
projection(cc) <- projection(landsat)

# plot rasters
par(mfrow=c(1,2))
plot(agb_ha, main="Aboveground biomass (Mg/ha)")
plot(cc, main="Canopy cover (%)")

# compute mean based on the raster
round(cellStats(agb_ha, stat='mean'), 2)
## [1] 16.13
round(cellStats(cc, stat='mean'), 2)
## [1] 23.42
# should be close to the field based estimate

As it can be sean, the rapideye slightly underestimated the mean aboveground biomass and canopy cover. As mentioned earlier ,the rapideye has higher residual values and RMSE which reflects in the underestimation. Therefore, overall, the landsat 8 has better accuracy than the rapideye.